#!/usr/bin/python3

import os
import argparse
import yaml

import references_slicer
import references_classifier

class Configure(object):
    def __init__(self):
        self.input_dir = ""
        self.input_file = ""
        self.output_file = ""
        self.file_extension = ""
        self.ref_start_expr = ""
        self.ref_end_expr = ""
        self.ref_line_head_expr = ""
        self.ref_line_tail_expr = ""
        self.similarity_threshold = 0.0

    def parse_config(self, config_path: str):
        f = open(config_path, "rb")
        y = yaml.load(f,Loader=yaml.FullLoader)

        self.file_extension = y["configure"]["file_extension"]
        self.ref_start_expr = y["configure"]["ref_start_expr"]
        self.ref_end_expr = y["configure"]["ref_end_expr"]
        self.ref_line_head_expr = y["configure"]["ref_line_head_expr"]
        self.ref_line_tail_expr = y["configure"]["ref_line_tail_expr"]
        self.similarity_threshold = y["configure"]["similarity_threshold"]

        # print(self.__dict__)
        return 

def classify_references_file(config: Configure):
    slicer = references_slicer.RefSlicer(config.ref_start_expr, 
                                        config.ref_end_expr, 
                                        config.ref_line_head_expr, 
                                        config.ref_line_tail_expr)

    slicer.open(config.input_file)
    slicer.find_all_refs()

    classifier = references_classifier.RefClassifier(config.similarity_threshold)
    classifier.refs_classify(slicer.get_ref_list())
    classifier.export(config.output_file)

def classify_references_dir(config: Configure):
    classifier = references_classifier.RefClassifier(config.similarity_threshold)
    slicer = references_slicer.RefSlicer(config.ref_start_expr, 
                                        config.ref_end_expr, 
                                        config.ref_line_head_expr, 
                                        config.ref_line_tail_expr)


    for root, _, files in os.walk(config.input_dir, topdown=False):
        for f in files:
            if os.path.splitext(f)[-1][1:] != config.file_extension:
                continue

            slicer.reset()
            slicer.open(os.path.join(root, f))
            slicer.find_all_refs()
            classifier.refs_classify(slicer.get_ref_list())

    classifier.export(config.output_file)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", dest = "config_file", help = "configure file. default: config.yml", default = "./config.yml")
    parser.add_argument("-f", dest = "input_file", help = "the file want to classfy.", default="")
    parser.add_argument("-d", dest = "input_dir", help = "the directory want to classfy.", default="./")
    parser.add_argument("-o", dest = "output_file", help = "the file want to output.", default="./ref_count.xlsx")

    args = parser.parse_args()

    conf = Configure()
    conf.parse_config(args.config_file)

    conf.input_dir = args.input_dir
    conf.input_file = args.input_file
    conf.output_file = args.output_file

    if conf.input_file != "":
        classify_references_file(conf)
    else: 
        classify_references_dir(conf)

    exit(0)